# -*- coding: utf-8 -*-
import scrapy
from ..items import DingdianItem


class ShengxuSpiderSpider(scrapy.Spider):
    name = 'shengxu_spider'
    # allowed_domains = ['xxx']
    start_urls = ['https://www.booktxt.net/2_2219/']

    def parse(self, response):
        all_part = response.xpath("//div[@id='list']/dl/dd/a")[10:]
        for part in all_part:
            title = part.xpath("./text()")[0].extract()
            href = 'https://www.booktxt.net/2_2219/' + part.xpath("./@href")[0].extract()
            yield scrapy.Request(url=href, callback=self.parse_content, meta={'title': title, 'href': href})

    def parse_content(self, response):
        title = response.meta['title']
        href = response.meta['href']
        all_content = response.xpath("//div[@id='content']/text()").extract()
        content = '\n'.join(all_content)
        item = DingdianItem()
        item['title'] = title
        item['href'] = href
        item['content'] = content
        print(title)
        yield item
